{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "10d0d538-4ce5-4f49-930f-b4603e7a453b",
   "metadata": {},
   "outputs": [],
   "source": [
    "#!pip install faiss-cpu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "5c589c8b-46f3-4df7-bd3b-6baae2a63bdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.document_loaders import TextLoader\n",
    "from langchain_community.vectorstores import FAISS\n",
    "from langchain_openai.embeddings import OpenAIEmbeddings\n",
    "from langchain_text_splitters import RecursiveCharacterTextSplitter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "35a2fb84-2d95-4db8-b896-77422e8f938b",
   "metadata": {},
   "outputs": [],
   "source": [
    "loader = TextLoader(\"./demo2.txt\")\n",
    "docs = loader.load()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1806cd71-6ed2-43a2-a3fa-a2d3be393243",
   "metadata": {},
   "outputs": [],
   "source": [
    "text_splitter = RecursiveCharacterTextSplitter(\n",
    "    chunk_size=500,\n",
    "    chunk_overlap=40,\n",
    "    separators=[\"\\n\\n\", \"\\n\", \"。\", \"！\", \"？\", \"，\", \"、\", \"\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "bc3db8f3-bb5b-4484-9209-632dccfb9f68",
   "metadata": {},
   "outputs": [],
   "source": [
    "texts = text_splitter.split_documents(docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "01597bf7-0851-4d22-830a-8427687cd12c",
   "metadata": {},
   "outputs": [],
   "source": [
    "embeddings_model = OpenAIEmbeddings()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "3d1e1cf4-7623-4d09-87ce-77bae9057aca",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "db = FAISS.from_documents(texts, embeddings_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "7867aa87-9e97-44ae-bb9a-500d4cd6e080",
   "metadata": {},
   "outputs": [],
   "source": [
    "retriever = db.as_retriever()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f42c2f7c-e06f-4f97-92f3-ec074baa331a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "“罗浮宫”这个名字的由来有些争议。根据法国百科全书辞典大拉鲁斯百科全书（Grand Larousse encyclopédique）的说法，这个名字来源于与狼狩猎巢穴的联系（拉丁语：lupus，下帝国： lupara）。[6]此后好几个世纪，罗浮宫发生了很大变化。14世纪，法王查理五世觉得罗浮宫堡比位于塞纳河当中之城岛（西岱岛）的西岱宫更适合居住，于是搬迁至此。在他之后的法国国王再度搬出罗浮宫，直至1546年，弗朗索瓦一世才成为居住在罗浮宫的第二位国王。弗朗索瓦一世除了曾从意大利购买了包括油画《蒙娜丽莎》在内的大量艺术品外，还将原始的中世纪建筑夷为平地，命令建筑师皮埃尔·勒柯按照文艺复兴风格对其加以改建，于1546年至1559年修建了今日罗浮宫建筑群最东端的卡利庭院。扩建工程一直持续到亨利二世登基。亨利二世去世后，王太后凯瑟琳·德·美第奇集中力量修建杜伊勒里宫及杜乐丽花园，对罗浮宫的扩建工作再度停止。\n"
     ]
    }
   ],
   "source": [
    "retrieved_docs = retriever.invoke(\"卢浮宫这个名字怎么来的？\")\n",
    "print(retrieved_docs[0].page_content)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "ff0ba9e1-3cb1-4eb6-aa8e-9482f9d599a8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1793年8月10日，即君主制灭亡一周年，罗浮宫正式命名为“中央艺术博物馆”开放。公众每周可以免费进入三天，这在当时被认为是一项重大成就，并受到普遍赞赏，当时博物馆展示了537幅画作和184件艺术品。四分之三来自皇室收藏，其馀来自教堂、贵族和地方政府等处没收来的艺术品。[8]同时为了扩大和组织收藏，共和国每年投入约100,000里弗维持博物馆的经营，并在1794年开始开始从北欧和欧洲等地带来作品，在法国大革命战争期间与教宗签署的《托伦蒂诺条约》（1797 年）之后，来自梵蒂冈的作品，如劳孔群像和观景殿的阿波罗也曾在卢浮宫展出。[9][10]\n",
      "\n",
      "拿破仑时代\n"
     ]
    }
   ],
   "source": [
    "retrieved_docs = retriever.invoke(\"卢浮宫在哪年被命名为中央艺术博物馆\")\n",
    "print(retrieved_docs[0].page_content)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9e284b73-afe2-49b2-94ce-d2e8b0850051",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
